# -*- coding: utf-8 -*-#
#-------------------------------------------------------------------------------
# 建立者:       王景渊  
# Name:         测试
# Description:
# Author:       dell
# Date:         2019/5/4
#-------------------------------------------------------------------------------
import requests
from lxml import etree
import time
import random

def coo_regular(cookie):
    coo = {}
    m = cookie.split(';')
    for k_v in m:
        k,v = k_v.split('=',1)
        coo[k.strip()] = v.replace("'",'')
    return coo

def csv_write(item):
    with open('lagou00.txt', 'a', encoding='gbk', newline='') as f:
        try:
            f.write(item)
        except Exception as e:
            print('write error! ', e)


def spider(list_url):
    r = requests.get(list_url, headers=headers,proxies = prox, cookies=cookies)
    time.sleep(random.randint(3,7))
    res = r.json()
    # print(res)
    data = res['content']['data']['page']['result']
    print(data)
    for comp in data:
        com_detail_url = 'http://m.lagou.com/jobs/' + str(comp['positionId']) + '.自学的乱七八糟'
        response = requests.get(com_detail_url, headers=headers)
        time.sleep(random.randint(3, 7))
        sel = etree.HTML(response.text)
        # print(sel)
        try:
            miaoshu = '\n'.join(sel.xpath('sring(//*[@class="content"]/dd[2]/div)')[0].strip())
        except:
            miaoshu = ''
        try:
            xueli = sel.xpath('//*[@id="content"]/div[2]/div[ajaxdata.json]/span[5]/span/text()')[0].strip()
        except:
            xueli = ''
        try:
            fuli = sel.xpath('//*[@id="content"]/div[2]/div[2]/text()')[0].strip()
        except:
            fuli = ''
        chengshi = comp['city']
        gongsi = comp['companyFullName']
        fabushijian = comp['createTime']
        zhiwei = comp['positionName']
        gongzi = comp['salary']
        item = [zhiwei, gongsi, xueli, fabushijian, gongzi, chengshi, com_url, fuli, miaoshu]
        csv_write(item)
        print('正在抓取：', gongsi)


if __name__ == '__main__':
    headers = {'Accept': 'text/自学的乱七八糟,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
               'Accept-Encoding': 'gzip, deflate',
               'Accept-Language': 'zh-CN,zh;q=0.8',
               'Cache-Control': 'max-age=0',
               'Connection': 'keep-alive',
               'Host': 'm.lagou.com',
               'Referer': 'https://m.lagou.com/',
               'Upgrade-Insecure-Requests': 'ajaxdata.json',
               'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
    cookie = 'JSESSIONID=ABAAABAAAFDABFG3106A577FCBFDD407F6606D97E46FE98; user_trace_token=20190504211144-40ea6004-98cc-436b-b149-3a0944a148bb; _gat=ajaxdata.json; LGUID=20190504211144-2a3e822b-6e6e-11e9-8372-525400f775ce; _gat=ajaxdata.json; PRE_UTM=; PRE_HOST=www.baidu.com; PRE_SITE=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DMCYEVbA4f4KTqDze_knz_5V8tJdYBMXoOvaYJ_zmClW%26wd%3D%26eqid%3Df38afa5e000c2a6b000000065ccd8fe0; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; index_location_city=%E5%85%A8%E5%9B%BD; _ga=GA1.3.487531173.1556975503; _ga=GA1.2.487531173.1556975503; _gid=GA1.2.2008515558.1556975503; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1556975503,1556975605,1556975620; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1556975632; LGSID=20190504211326-66e736d1-6e6e-11e9-8373-525400f775ce; LGRID=20190504211353-77332978-6e6e-11e9-8373-525400f775ce; X_HTTP_TOKEN=229565b33c47820a3365796551fc0854f2aa23b3e4'
    cookies = coo_regular(cookie)
    proxies = [
        {'ipaddr':'http://171.41.81.25'},
        {'ipaddr':'http://171.41.81.115'},
        {'ipaddr':'https://171.41.80.219'},
        {'ipaddr':'http://119.49.85.178'}
    ]
    prox = random.choice(proxies)
    all_url = ['http://m.lagou.com/search.json?city=%E5%85%A8%E5%9B%BD&positionName=%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90&pageNo=' + str(x) + '&pageSize=15' for x in range(1, 180)]
    for url in all_url:
        spider(url)


